import os,sys,json,re
import pandas as pd
import numpy as np
The file directory in which the .rec files are located is named 'day' and needs to be within the working directory - or modified
''' Works if this notebook is in the working directory of the interpreter
- otherwise, replace 'day' with the directory in which the rec files(and logs) are located'''
file_list = os.listdir('day')
r0 = re.compile(r'^.*.rec$')
r1 = re.compile(r'^[0-9]{3}-(?P<day>day[0-9]+).rec$')
day = ''
flag = False
for file in file_list: #assume file is file name
if r0.match(file): #if our .rec regex matches - we know it's a .rec file
flag = True
results = re.search(r1, file) #if no match results = None
if not results:
print("incorrectly formatted .rec")
print(file)
break
else:
if day == '':
day = results.group('day') #gets day
else:
if results.group('day') != day:
print("incorrect day")
break
if not flag:
print('No rec files found')
rec_list = [rec for rec in file_list if r0.match(rec)]
rec_list.sort()
Creates a dataframe out of ordered REC files in a folder 'day' with format ###-day#.rec
def getval(val):
try:
return val
except:
None
timestamps,uuids,pos_xs,pos_ys,neighbor_xs,neighbor_ys,phases,last_rec = [],[],[],[],[],[],[],[]
# last_rec signifies whether the timestamp belongs to the last recording of the session - used to accumulate the timestamps
nano = 0
for rec in rec_list:
json_list = []
with open('day/{}'.format(rec)) as f:
for r in f.readlines():
json_list.append(json.loads(r))
for l in json_list:
if l['message'] == '_SAVE_LIVEFRAME':
for visual in l['data']['visuals']:
if 'uuid' in visual and visual.get('type') == 'Mesh':
uuids.append(visual['uuid'])
timestamps.append(l['timestamp'] + nano)
pos_xs.append(float(visual['position']['x']))
pos_ys.append(float(visual['position']['y']))
neighbor_xs.append(float(visual['dashedLine']['x2']) if 'dashedLine' in visual and 'x2' in visual['dashedLine'] else None)
neighbor_ys.append(float(visual['dashedLine']['y2']) if 'dashedLine' in visual and 'y2' in visual['dashedLine'] else None)
phases.append(visual['dashedLine']['settings']['width']
if 'dashedLine' in visual and 'settings' in visual['dashedLine'] and 'width' in visual['dashedLine']['settings'] else None)
last_rec.append(0)
print(rec)
if last_rec:
last_rec[-1] = 1
nano = timestamps[-1]
df_dict = {'uuids': uuids, 'timestamps':timestamps, 'pos_xs':pos_xs, 'pos_ys':pos_ys, 'neighbor_xs':neighbor_xs, 'neighbor_ys':neighbor_ys, 'phases':phases, 'last_rec': last_rec}
df = pd.DataFrame(df_dict)
df
The unique UUIDs present in the dataset of the day
df['uuids'].unique(), len(df['uuids'].unique()) # Unique uuids - uniques across the entire dataframe
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show
output_notebook()
Creates the timestamp and velocity series to visualize for one user
test_frame = df['uuids'].unique()[0]
time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
x_diff = df[df['uuids'] == test_frame]['pos_xs'].diff()
y_diff = df[df['uuids'] == test_frame]['pos_ys'].diff()
dist_diff = np.sqrt(x_diff**2 + y_diff**2)
velocity = dist_diff/time_diff
u1t = df[df['uuids'] == test_frame]['timestamps']
p = figure(x_axis_label='time', y_axis_label='velocity')
p.line(u1t, velocity)
show(p)
Visualizes the velocities of all UUIDs present
from bokeh.palettes import inferno
xs = []
ys = []
for u in df['uuids'].unique():
test_frame = u
time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
x_diff = df[df['uuids'] == test_frame]['pos_xs'].diff()
y_diff = df[df['uuids'] == test_frame]['pos_ys'].diff()
dist_diff = np.sqrt(x_diff**2 + y_diff**2)
velocity = dist_diff/time_diff
u1t = df[df['uuids'] == test_frame]['timestamps']
xs.append(u1t)
ys.append(velocity)
df['velocity'] = pd.concat(ys)
p = figure(x_axis_label='time', y_axis_label='velocity', y_range=[0,1e-7])
p.multi_line(xs, ys, color=inferno(len(df['uuids'].unique())), alpha=[.5 for n in range(len(df['uuids'].unique()))])
# Identifying the transition points of the REC files
cutoff = df[df['last_rec']==1][['velocity', 'timestamps']]
p.circle(cutoff['timestamps'], cutoff['velocity'], fill_alpha=.3, size=7)
show(p)
df.to_csv('frame.csv')
Visualizes the distances between UUIDs and their neighbors
xs = []
ys = []
for u in df['uuids'].unique():
test_frame = u
time_diff = df[df['uuids'] == test_frame]['timestamps'].diff()
x_diff = df[df['uuids'] == test_frame]['pos_xs']-df[df['uuids'] == test_frame]['neighbor_xs']
y_diff = df[df['uuids'] == test_frame]['pos_ys']-df[df['uuids'] == test_frame]['neighbor_ys']
dist_neighbor = np.sqrt(x_diff**2 + y_diff**2)
u1t = df[df['uuids'] == test_frame]['timestamps']
xs.append(u1t)
ys.append(dist_neighbor)
df['dist_neighbor'] = pd.concat(ys)
p = figure(x_axis_label='Time', y_axis_label='Distance from Neighbor')
p.multi_line(xs,ys, color=inferno(len(df['uuids'].unique())), alpha=[.5 for n in range(len(df['uuids'].unique()))])
# Identifying the transition points of the REC files
cutoff = df[df['last_rec']==1][['dist_neighbor', 'timestamps']]
p.circle(cutoff['timestamps'], cutoff['dist_neighbor'], fill_alpha=.3, size=7)
show(p)
The timeseries is resampled about 1 minute intervals
phase_dict = {1:'gas', 30: 'ice', 15: 'liquid', 2: 'impossible'}
from collections import Counter
time_df = df.copy().set_index(pd.TimedeltaIndex(df['timestamps']))
time_df['phases'] = time_df['phases'].map(phase_dict)
time_df_res = time_df.resample('1min')
time_df_res = time_df_res.aggregate({'phases': lambda x: list(x)})
time_df_res = pd.DataFrame(time_df_res)
time_df_res['phases'] = time_df_res['phases'].apply(lambda x: pd.np.nan if not len(x) else x)
time_df_res = time_df_res[~time_df_res['phases'].isnull()]
def p_phase(x):
x = x[0]
count = Counter(x)
sum_val = sum(count.values())
count_dict = {'p_gas': count['gas']/sum_val,
'p_ice': count['ice']/sum_val,
'p_liquid': count['liquid']/sum_val}
return pd.Series(count_dict)
phase_prob = time_df_res.apply(p_phase, axis=1)
The percentage of UUIDs in any given phase(solid, liquid, gas) is plotted against time(using the resampled[1min] frame)
p = figure(x_axis_type = 'datetime',
x_axis_label = 'Time',
y_axis_label = 'Percent of children in Phase')
p.multi_line([phase_prob.index]*3,[phase_prob['p_gas'], phase_prob['p_liquid'], phase_prob['p_ice']],
line_color=['red', 'green', 'blue'])
from bokeh.models.formatters import DatetimeTickFormatter
p.xaxis.formatter = DatetimeTickFormatter(formats=dict(hours=["%s"],days=["%s"],months=["%s"],years=["%s"]))
show(p)